﻿import re
import requests
from urllib import error
from bs4 import BeautifulSoup
import os
from sys import argv

# 初始化数据
num = 0
numPicture = 0
file = ''

 
# 下载图片
def dowmloadPicture(html, keyword):
    global num
    # 利用正则表达式找到图片url
    pic_url = re.findall('"img":"(.*?).jpg', html, re.S)

    for each in pic_url:
        each = each + '.jpg'
        each = each.replace('\\','')
        print(each)
        try:
            if each is not None:
                pic = requests.get(each, timeout=10)
            else:
                continue
        except BaseException:
            continue
        else:
            string = file + r'\\' + keyword + '_' + str(num) + '.jpg'
            fp = open(string, 'wb')
            fp.write(pic.content)
            fp.close()
            num += 1
        if num >= numPicture:
            return

if __name__ == '__main__':  # 主函数入口

    # 从命令行获取关键字、图片数量与保存路径
    word = str(argv[1])
    numPicture = int(argv[2])
    file = argv[3]
    
    # 指定网页地址是 搜狗图片 ， "query=" 是搜索的关键字
    url = 'https://image.so.com/i?q=' + word + '&sn='

    page =0

    # 如果指定的文件夹不存在
    if not os.path.exists(file):
        os.mkdir(file)

    # 开始下载图片
    while num < numPicture:
        result = requests.get(url + str(page), timeout=10)
        dowmloadPicture(result.text, word)
        page += 60
